{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats\n",
    "import seaborn as sns\n",
    "from sklearn import preprocessing\n",
    "\n",
    "%matplotlib inline\n",
    "pd.set_option(\"display.max_colwidth\",100)\n",
    "sns.set(style=\"ticks\", color_codes=True)\n",
    "\n",
    "def plot_data(data, x, y, x_title='x', y_title='y', xlim=None, ylim=None, size=4, alpha=0.02):\n",
    "    slope, intercept, r_value, p_value, std_err = stats.linregress(data[x],data[y])\n",
    "    print '# of UTRs = ', len(data)\n",
    "    print 'r-squared = ',r_value**2\n",
    "\n",
    "    sns.set(style=\"ticks\", color_codes=True)\n",
    "    g = sns.JointGrid(data=data, x=x, y=y, xlim=xlim, ylim=ylim, size=size)\n",
    "    g = g.plot_joint(plt.scatter, color='#e01145', edgecolor=\"black\", alpha=alpha)\n",
    "    f = g.fig\n",
    "    f.text(x=0, y=0, s='r2 = {}'.format(round(r_value**2, 3)))\n",
    "    g = g.plot_marginals(sns.distplot, kde=False, color='#e01145')\n",
    "    g = g.set_axis_labels(x_title, y_title)\n",
    "    \n",
    "import keras\n",
    "np.random.seed(1337)\n",
    "from keras.preprocessing import sequence\n",
    "from keras.optimizers import RMSprop\n",
    "from keras.models import Sequential\n",
    "from keras.layers.core import Dense\n",
    "from keras.layers.core import Dropout\n",
    "from keras.layers.core import Activation\n",
    "from keras.layers.core import Flatten\n",
    "from keras.layers.convolutional import Convolution1D\n",
    "from keras.constraints import maxnorm\n",
    "\n",
    "def one_hot_encode(df, col='utr', seq_len=50):\n",
    "    # Dictionary returning one-hot encoding of nucleotides. \n",
    "    nuc_d = {'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1], 'n':[0,0,0,0]}\n",
    "    \n",
    "    # Creat empty matrix.\n",
    "    vectors=np.empty([len(df),seq_len,4])\n",
    "    \n",
    "    # Iterate through UTRs and one-hot encode\n",
    "    for i,seq in enumerate(df[col].str[:seq_len]): \n",
    "        seq = seq.lower()\n",
    "        a = np.array([nuc_d[x] for x in seq])\n",
    "        vectors[i] = a\n",
    "    return vectors\n",
    "\n",
    "def test_data(df, model, test_seq, obs_col, output_col='pred'):\n",
    "    scaler = preprocessing.StandardScaler()\n",
    "    scaler.fit(df[obs_col].reshape(-1,1))\n",
    "    #df.loc[:,'obs_stab'] = test_df['stab_df']\n",
    "    predictions = model.predict(test_seq).reshape(-1)\n",
    "    df.loc[:,output_col] = scaler.inverse_transform(predictions)\n",
    "    return df\n",
    "\n",
    "def binarize_sequences(df, col='utr', seq_len=54):\n",
    "    vector=np.empty([len(df),seq_len,4])\n",
    "    for i,seq in enumerate(df[col].str[:seq_len]):\n",
    "        vector[i]=vectorizeSequence(seq.lower())\n",
    "    return vector\n",
    "\n",
    "def vectorizeSequence(seq):\n",
    "    # the order of the letters is not arbitrary.\n",
    "    # Flip the matrix up-down and left-right for reverse compliment\n",
    "    ltrdict = {'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1], 'n':[0,0,0,0]}\n",
    "    return np.array([ltrdict[x] for x in seq])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load data to get MRL values for scaling - convert the scaled predictions of the evolved sequences to MRL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StandardScaler(copy=True, with_mean=True, with_std=True)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('../data/egfp_unmod_1.csv')\n",
    "df.sort_values('total_reads', ascending=False).reset_index(drop=True)\n",
    "\n",
    "# Select a number of UTRs for the purpose of scaling.\n",
    "scale_utrs = df[:40000]\n",
    "\n",
    "# Scale\n",
    "scaler = preprocessing.StandardScaler()\n",
    "scaler.fit(scale_utrs['rl'].reshape(-1,1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Choose model to use for evolving new sequences.\n",
    "In the paper, the model used for evolving sequences is saved here as \"evolution_model.hdf5\". This is an older model that predates the main egfp model used in the paper (trained on unmodified EGFP replicate #1). The main model used in the paper is labeled \"main_MRL_model.hdf5\". Both retrained models are also available."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import load_model\n",
    "# model = load_model('../modeling/saved_models/evolution_model.hdf5')\n",
    "# model = load_model('../modeling/saved_models/retrained_evolution_model.hdf5')\n",
    "# model = load_model('../modeling/saved_models/main_MRL_model.hdf5')\n",
    "model = load_model('../modeling/saved_models/retrained_main_MRL_model.hdf5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Genetic algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import math\n",
    "\n",
    "def ret_rand_nuc():\n",
    "    x = random.randint(0,3)\n",
    "    if x == 0:\n",
    "        return [1,0,0,0] # A\n",
    "    if x == 1:\n",
    "        return [0,1,0,0] # C\n",
    "    if x == 2:\n",
    "        return [0,0,1,0] # G\n",
    "    if x == 3:\n",
    "        return [0,0,0,1] # T\n",
    "    \n",
    "def vector_to_nuc(arr, seq_len=50):\n",
    "    seq = ''\n",
    "    for i in range(seq_len):\n",
    "        if arr[i,0] == 1:\n",
    "            seq = seq + 'A'\n",
    "        if arr[i,1] == 1:\n",
    "            seq = seq + 'C'\n",
    "        if arr[i,2] == 1:\n",
    "            seq = seq + 'G'\n",
    "        if arr[i,3] == 1:\n",
    "            seq = seq + 'T'\n",
    "    return seq\n",
    "\n",
    "def convert_and_save(sequences, predictions):\n",
    "    # Convert the one-hot encoded sequences to A, C, T, G\n",
    "    seqs = []\n",
    "    for nbr in xrange(len(sequences)):\n",
    "        seqs.append(vector_to_nuc(sequences[nbr]))\n",
    "    df = pd.DataFrame(data=[seqs,predictions.tolist()]).transpose()\n",
    "    df.columns = ['utr', 'prediction']\n",
    "    df.sort_values('prediction', ascending=False, inplace=True)\n",
    "    return df\n",
    "\n",
    "def make_random_sequences(nbr_sequences, length, constant='', no_uaug=False, no_stop=False):\n",
    "    # Make randomized sequences, allowing for the inclusion / exclusion of uATGs / stop codons\n",
    "    seqs = []\n",
    "    nucs = {0:'A', 1:'T', 2:'C', 3:'G'}\n",
    "    i = 0\n",
    "    while i < nbr_sequences:\n",
    "        new_seq = ''\n",
    "        for n in range(length - len(constant)):\n",
    "            new_seq = new_seq + nucs[random.randint(0,3)]\n",
    "        \n",
    "        if no_uaug == False or (no_uaug==True and 'ATG' not in new_seq):\n",
    "            if no_stop == False or (no_stop == True and ('TAG' not in new_seq and 'TGA' not in new_seq and 'TAA' not in new_seq)):\n",
    "                new_seq = new_seq + constant\n",
    "                seqs.append(new_seq)\n",
    "                i+=1\n",
    "    return seqs\n",
    "\n",
    "def simple_mutate(seq, nbr_bases=1, prob=1):\n",
    "    if nbr_bases > 1 and prob > random.random():\n",
    "        nbr_bases = nbr_bases\n",
    "    else:\n",
    "        nbr_bases = 1\n",
    "    for i in range(nbr_bases):\n",
    "        pos = random.randint(0, 49)\n",
    "        seq[pos] = ret_rand_nuc()\n",
    "    return seq\n",
    "\n",
    "def check_for_uaug(seq):\n",
    "    seq = vector_to_nuc(seq)\n",
    "    return 'ATG' in seq[:50]\n",
    "\n",
    "def check_for_stops(seq):\n",
    "    seq = vector_to_nuc(seq)\n",
    "    if 'TAG' in seq[:50] or 'TGA' in seq[:50] or 'TAA' in seq[:50]:\n",
    "        return True\n",
    "    return False\n",
    "\n",
    "def negative_selection(seq, model, scaler, target_val, no_uaug=False, no_stop=False, nbr_bases_to_mutate=1, multi_mutate_prob=1):\n",
    "    seqs = np.empty([2,54,4])\n",
    "    seqs[0] = seq.copy()\n",
    "    seqs[1] = simple_mutate(seq.copy(), nbr_bases=nbr_bases_to_mutate, prob=multi_mutate_prob)\n",
    "    \n",
    "    if no_uaug == True and check_for_uaug(seqs[1]):\n",
    "        return seqs[0]\n",
    "    if no_stop == True and check_for_stops(seqs[1]):\n",
    "        return seqs[0]\n",
    "    \n",
    "    scores = model.predict(seqs).reshape(-1)\n",
    "    scores = scaler.inverse_transform(scores)\n",
    "    if scores[1] < scores[0]:\n",
    "        if scores[1] >= target_val:\n",
    "            return seqs[1]\n",
    "        else:\n",
    "            return seqs[0]\n",
    "    else:\n",
    "        return seqs[0]    \n",
    "\n",
    "def selection(seq, model, scaler, target_val, no_uaug=False, no_stop=False, nbr_bases_to_mutate=1, multi_mutate_prob=1):\n",
    "    seqs = np.empty([2,50,4])\n",
    "    seqs[0] = seq.copy()\n",
    "    seqs[1] = simple_mutate(seq.copy(), nbr_bases=nbr_bases_to_mutate, prob=multi_mutate_prob)\n",
    "    \n",
    "    if no_uaug == True and check_for_uaug(seqs[1]):\n",
    "        return seqs[0]\n",
    "    if no_stop == True and check_for_stops(seqs[1]):\n",
    "        return seqs[0]\n",
    "    \n",
    "    scores = model.predict(seqs).reshape(-1)\n",
    "    scores = scaler.inverse_transform(scores)\n",
    "    if scores[1] > scores[0]:\n",
    "        if scores[1] <= target_val:\n",
    "            return seqs[1]\n",
    "        else:\n",
    "            return seqs[0]\n",
    "    else:\n",
    "        return seqs[0]    \n",
    "    \n",
    "def selection_to_target(seq, model, scaler, target_val, no_uaug=False, no_stop=False, nbr_bases_to_mutate=1, multi_mutate_prob=1, seq_len=50, accept_range=0.1):\n",
    "    seqs = np.empty([2,seq_len,4])\n",
    "    # Save the incoming sequence before mutating\n",
    "    seqs[0] = seq.copy()\n",
    "    # The mutated sequence\n",
    "    seqs[1] = simple_mutate(seq.copy(), nbr_bases=nbr_bases_to_mutate, prob=multi_mutate_prob)\n",
    "    \n",
    "    # Decide whether to continue with the new sequence based on the uAUG / stop codon preference\n",
    "    if no_uaug == True and check_for_uaug(seqs[1]):\n",
    "        return seqs[0]\n",
    "    if no_stop == True and check_for_stops(seqs[1]):\n",
    "        return seqs[0]\n",
    "    \n",
    "    scores = model.predict(seqs).reshape(-1)\n",
    "    scores = scaler.inverse_transform(scores)\n",
    "    \n",
    "    # Accept sequences that fall within this range. May provide more sequence diversity\n",
    "    if scores[0] >= target_val - accept_range and scores[0] <= target_val + accept_range:\n",
    "        return seqs[0]\n",
    "    else:\n",
    "        if abs(target_val - scores[1]) <= abs(target_val - scores[0]):\n",
    "            return seqs[1]\n",
    "        else:\n",
    "            return seqs[0]    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_____"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evolve new sequences to hit target MRLs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Working on target_rl 5 with 10 sequences: 20 40 60 80 100 120 140 160 180 200 220 240 260 280 300 320 340 360 380 400 420 440 460 480 500 520 540 560 580 600 620 640 660 680 700 720 740 760 780 800\n",
      "Working on target_rl 8 with 10 sequences: 20 40 60 80 100 120 140 160 180 200 220 240 260 280 300 320 340 360 380 400 420 440 460 480 500 520 540 560 580 600 620 640 660 680 700 720 740 760 780 800\n"
     ]
    }
   ],
   "source": [
    "# Dictionary where new sequences are saved\n",
    "evolved_seqs = {}\n",
    "\n",
    "# Number of evolution iterations\n",
    "iterations = 800\n",
    "# Number of bases to mutate if the probability to 'multi-mutate' is exceeded\n",
    "nbr_bases_to_mutate = 2\n",
    "# Probability to change multiple bases in an iteration\n",
    "prob_of_multi_mutation = 0.5\n",
    "# If using the original evolution model, set seq_len to 54. That model was\n",
    "# trained on UTRs that included the first for basees of the CDS (ATGG).\n",
    "seq_len = 50\n",
    "# Choose target MRLs and the number of sequences to create for each\n",
    "targets = [5, 8]\n",
    "seqs_per_target = [10, 10]\n",
    "# Choose whether or not to allow uAUGs and / or stop codons\n",
    "no_uaug = True\n",
    "no_stop = False\n",
    "\n",
    "for target_rl, nbr_sequences in zip(targets, seqs_per_target):\n",
    "    print 'Working on target_rl {} with {} sequences:'.format(target_rl, nbr_sequences),\n",
    "    \n",
    "    # Randomly generate starting sequences for evolving\n",
    "    rand_seqs = make_random_sequences(nbr_sequences, seq_len, no_uaug=no_uaug, no_stop=no_stop)\n",
    "    test_sequences = np.empty([len(rand_seqs), seq_len, 4])\n",
    "    i = 0\n",
    "    \n",
    "    # One-hot encode sequences\n",
    "    for seq in rand_seqs:\n",
    "        test_sequences[i] = vectorizeSequence(seq.lower())\n",
    "        i += 1\n",
    "    \n",
    "    # Evolve sequences\n",
    "    for generation in range(0, iterations):\n",
    "        for i in range(len(test_sequences)):\n",
    "            test_sequences[i] = selection_to_target(seq=test_sequences[i], model=model, scaler=scaler, target_val=target_rl,no_uaug=no_uaug,\n",
    "                                        no_stop=no_stop, nbr_bases_to_mutate=nbr_bases_to_mutate, multi_mutate_prob=prob_of_multi_mutation, seq_len=seq_len)\n",
    "\n",
    "        if (generation + 1) %  20 == 0:\n",
    "            print generation + 1,            \n",
    "    print\n",
    "    # Final prediction then convert to text sequence\n",
    "    predictions = model.predict(test_sequences).reshape(-1)\n",
    "    predictions = scaler.inverse_transform(predictions)\n",
    "    converted_df = convert_and_save(test_sequences,predictions)\n",
    "    \n",
    "    evolved_seqs[target_rl] = converted_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:1320: UserWarning: findfont: Font family [u'sans-serif'] not found. Falling back to DejaVu Sans\n",
      "  (prop.get_family(), self.defaultFamily[fontext]))\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAewAAAFWCAYAAACre65zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3X2M1OXd7/HPb2b2iZ1lYZd94Eks\nFqzFBr0Dt6VaOEAAU6EiPpXYRhFjmtqllbulorXpIbmrJydHbSAnJ9RzIr1LiVoVvKEnKmurHqBq\nLUJVQBFUFmF3GfaBfd75ze/8MTuzM7C7Mywz87tm5/1KSGFmdubbHxM/fK/r+l2X5TiOIwAAYDSP\n2wUAAIDECGwAALIAgQ0AQBYgsAEAyAIENgAAWYDABgAgCxDYAABkAQIbAIAsQGADAJAFCGwAALIA\ngQ0AQBbwuV1AV1eXPvjgA1VUVMjr9bpdDgAAaWXbthobG3X11VersLAw6Z9zPbA/+OAD3XXXXW6X\nAQBARm3dulWzZs1K+vWuB3ZFRYWkcOHV1dUuVwMAQHqdPn1ad911VzT/kuV6YEeGwaurqzVp0iSX\nqwEAIDMudhqYRWcAAGQBAhsAgCxAYAMAkAUIbAAAsgCBDQBAFiCwAQDIAq7f1mW6Z555Rs8//7ws\ny9L06dP12GOPqaCgwO2yAAA5hg57CPX19fr973+vF154QTt37pRt29q1a5fbZQEAchCBnYBt2+rq\n6lIwGFRXV5cqKyvdLgkAkIOyYkj8//znh9pz4GRK3/P6mRN177IZQ76mqqpK9957r+bPn6+CggJd\nf/31uuGGG1JaBwDAXfuPNGj8uGJVlxe7XcqQ6LCH0NLSotraWtXW1uqtt95SZ2enduzY4XZZAIAU\nCbR06leb9+mB//4Xt0tJKCs67HuXzUjYDafD3r17NWnSJJWVlUmSFi9erP379+vmm2/OeC0AgNQ7\n09wpSerptV2uJDE67CFMmDBBBw4cUGdnpxzH0b59+3TFFVe4XRYAIEU6u4Nul5C0rOiw3TJz5kwt\nWbJEt9xyi3w+n6666irdeeedbpcFAEgRAnsEWbNmjdasWeN2GQCANOjpDbldQtIYEgcA5KzeoPlz\n1xEJO+xTp05p3bp1CgQCsixLd9xxh+6++25t3LhRzz33XHRB1tq1azVv3ry0FwwAQKr0Bvs7bNsO\nyes1t49NGNher1cPPfSQZsyYoba2Nt166626/vrrJUn33HOPVq9enfYiAQBIh9jA7g1meWBXVlZG\nd/fy+/2aOnWq6uvr014YAADpFhvYPcGQCg0+KuKi/ilRV1enQ4cOaebMmZKkrVu3atmyZVq/fr1a\nWlrSUiAAAOkSDMUPiZss6cBub2/XmjVr9PDDD8vv92vlypV67bXXtGPHDlVWVurxxx9P+B4bN27U\nlVdeGfdr4cKFl/R/AACA4bJtJ/r7XsMDO6nbunp7e7VmzRotW7ZMixcvliSNGzcu+vztt9+uH/7w\nhwnfp6amRjU1NXGP1dXVGR3aCxYsUHFxsTwej7xer1588UW3SwIApEgwJqSD2R7YjuPokUce0dSp\nU7Vq1aro4w0NDdG57d27d2vatGnpq9JlW7Zsia6GBwCMHMGYDjsYzPLAfu+997Rjxw5Nnz49uof2\n2rVrtXPnTh0+fFiSNHHiRG3YsCG9lQIAkGJ2XIftDPFK9yUM7FmzZunIkSMXPJ7Je67/4/0X9LcT\n/0jpe35z8r/oB9fcmtRrV69eLcuydOedd7I1KQCMICNqSDzXbdu2TVVVVQoEAlq1apWmTp2q2bNn\nu10WACAF7FB/V21ne4dtgh9cc2vS3XCqVVVVSZLKy8u1aNEiHTx4kMAGgBEiFBPYsbd4mcjcLV0M\n0NHRoba2tujv9+zZM6IX1wFAronvsM0O7KzosN0SCAT0wAMPSJJs29bSpUs1d+5cl6sCAKRK7DB4\n1i86y2WTJ0/Wyy+/7HYZAIA0sWOGwWOHx03EkDgAIGfFDombvkqcwAYA5KzYIXGbDhsAADPZI/Hw\nDwAARprYeWs6bAAADGUT2AAAmC/kZM992AQ2ACBnsegMAIAsENthm75xCoENAMhZsV01G6cAAGCo\n+FXizGEDAGCkEB02AADm47YuAACyAB02AABZIO4+bAIbAAAzsTUpAABZIL7DZpU4AABGipvDZuMU\nAADMFDck7hDYAAAYKRRy5LH6f28yAhsAkLNCjiOvNxyFNkPiAACYyQ458va12CGGxAEAMFMo5MhH\nhw0AgNkcR9EOm9u6AAAwVNwcNovOAAAwUyjkyOu1or83GYENAMhZjuPI56HDBgDAWI7jKOSov8Nm\nlTgAAOaJNNTevg6brUkBADBQZM6aDhsAAIM5fQHdf1sXgQ0AgHEiHbZlWfJ4LFaJAwBgosgQuMcK\n/yKwAQAwUCSfIx02O50BAGCg/iFxyWNZMjyvCWwAQG6KBLbHsuSx6LABADBSZJW45bFkeSxu6wIA\nwESRgLYUXnTG8ZoAABgoMgLu8YSHxOmwAQAwkBPTYVtZcB+2L9ELTp06pXXr1ikQCMiyLN1xxx26\n++671dzcrAcffFAnT57UxIkT9dRTT6m0tDQTNQMAcMmiQ+LRRWdmB3bCDtvr9eqhhx7Sn//8Zz37\n7LP64x//qKNHj2rz5s2aM2eOXn31Vc2ZM0ebN2/ORL0AAKTEiNs4pbKyUjNmzJAk+f1+TZ06VfX1\n9aqtrdXy5cslScuXL9fu3bvTWykAACkUuzWp5TG/w044JB6rrq5Ohw4d0syZMxUIBFRZWSlJqqio\nUCAQSPjzGzdu1KZNm4ZXKQAAKeTE7HTmtUbAHHZEe3u71qxZo4cfflh+vz/uOcuyZFlWwveoqalR\nTU1N3GN1dXVauHBhsmUAAJASsTudZUOHndQq8d7eXq1Zs0bLli3T4sWLJUnl5eVqaGiQJDU0NKis\nrCx9VQIAkGLROWyPNTLmsB3H0SOPPKKpU6dq1apV0ccXLFig7du3S5K2b99OlwwAyCrRIXEpK+7D\nTjgk/t5772nHjh2aPn26br75ZknS2rVrdf/99+unP/2p/vSnP2nChAl66qmn0l4sAACpMuIWnc2a\nNUtHjhwZ8LktW7akvCAAADKh/z7syGldZgc2O50BAHJS/33Y4Y1TJLPnsQlsAEBOcvr2ErcsS56+\nNDR5WJzABgDkpPOHxGMfMxGBDQDISbGBbXkYEgcAwEjOAHPYDIkDAGCYuDlsFp0BAGAmO25IPPwY\ngQ0AgGEGHhIPuVnSkAhsAEBOij38wxNddOZmRUMjsAEAOSn2eE06bAAADBV/H3b8YyYisAEAOen8\nwz9iHzMRgQ0AyEmRZpr7sAEAMFgo5kBsDx02AABm6r+tq38Omw4bAADDxM5hs9MZAACGCsXMYbPo\nDAAAQzkDHK/JkDgAAIaJBrYYEgcAwFih6E5nHP4BAICxYvcS90aGxNnpDAAAs/TPYbPoDAAAY4UG\nOPyDwAYAwDADrxLntC4AAIwSt3GKh9u6AAAwUv/GKeEuW2JIHAAA48QuOvOy6AwAADPFzWEzJA4A\ngJlC/adrymJrUgAAzMRpXQAAZIHoedgeVokDAGCs2CHxvrzmPmwAAEwTu0rcE10l7mZFQyOwAQA5\nKTTQKnHb3MQmsAEAOWnARWec1gUAgFmcmJ3OPGycAgCAmeLmsLkPGwAAM/Ufr9m/lziBDQCAYWLn\nsL0sOgMAwEwD7SVucIMtn9sFAADghlDcHHb4MZM3TiGwAQA5yYnZ6SyS2CHb3BabwAYA5KTYOey+\nBju7F52tX79ec+bM0dKlS6OPbdy4Ud/+9rd188036+abb9Ybb7yR1iIBAEi1yJC4x1L/ojODAzth\nh71ixQp9//vf1y9+8Yu4x++55x6tXr06bYUBAJBO0SFxK/a0LnPnsBN22LNnz1ZpaWkmagEAIGMG\nWiVucoc97Nu6tm7dqmXLlmn9+vVqaWlJZU0AAKRdaICdzkxedDaswF65cqVee+017dixQ5WVlXr8\n8ceT+rmNGzfqyiuvjPu1cOHC4ZQAAMAlcWJ2OhuxHfa4cePk9Xrl8Xh0++2365///GdSP1dTU6Mj\nR47E/aqtrR1OCQAAXJLYDts7EuawB9LQ0BD9/e7duzVt2rSUFQQAQCZEb+uSYjZOMbfDTrhKfO3a\ntXrnnXfU1NSkuXPnqqamRu+8844OHz4sSZo4caI2bNiQ9kIBAEil6PGanphV4gbPYScM7CeeeOKC\nx26//fa0FAMAQKZEh8SlmOM1R9iQOAAA2S7uPOyRuugMAIBs58Sdhx0+AMTkIXECGwCQk2L3EpfC\nc9kMiQMAYJhQzE5nkuTxeBgSBwDANNFV4tEOmyFxAACME+mwI7wWQ+IAABjH6Rv+jqwQ93gsBemw\nAQAwS3SVeN+fPR5Ltk2HDQCAUWL3EpckL4vOAAAwz4WrxC0F6bABADCL44SHw/s7bItV4gAAmCYU\ncqLdtRQObDpsAAAME3KcaHctRXY6o8MGAMAoznmB7WWVOAAA5gk5ihsS93gshZz+PcZNQ2ADAHLS\n+R22zxuORFN3OyOwAQA56fxFZ5Edz3qDBDYAAMZwnP6DP6TwHLYkYxeeEdgAgJwUXiXe/+dIYAfp\nsAEAMMcFq8T75rBNPQCEwAYA5KRQyFFMg93fYRt6axeBDQDISSGnf6GZFD78QyKwAQAwiuOc12F7\n6bABADBOeOOUmPuwua0LAADzXHD4R9+iMwIbAACDXLhKnNu6AAAwzvn3YUe2Ju1lDhsAAHM4ofPn\nsCND4rZbJQ2JwAYA5KQLOmwfi84AADCO4zhxe4n7WHQGAIB5zj8POxLYPb0MiQMAYIzzV4nn+foC\nmw4bAABznL+XOB02AAAGCjlO3F7ivr4Ou5vABgDAHM55p2jmRwK7h8AGAMAYoVD8KvE8n1cSHTYA\nAEY5f9EZHTYAAIZxHOeC27ry88Iddmd30KWqhkZgAwByTmT+msAGAMBgTl9ix+90ZsnrsdTZRWAD\nAGCEUF9gx85hW5alwgKf2jp73SprSAQ2ACDnhPqGxD1W/ONFBT61dfRkvqAkENgAgJwTCl3YYUtS\ncaFP5zp6ZIecgX7MVQQ2ACDnONEh8fjHS0blK+RIrW3dLlQ1tISBvX79es2ZM0dLly6NPtbc3KxV\nq1Zp8eLFWrVqlVpaWtJaJAAAqRSKrhKPT+wxJQWSpPqmjkyXlFDCwF6xYoWefvrpuMc2b96sOXPm\n6NVXX9WcOXO0efPmtBUIAECq9Q+Jxz9eNrpQkvRlY3umS0ooYWDPnj1bpaWlcY/V1tZq+fLlkqTl\ny5dr9+7d6akOAIA0GOi2LkmqGFMkSTrZ2JbxmhLxDeeHAoGAKisrJUkVFRUKBAJJ/dzGjRu1adOm\n4XwkAAApM1iHXV4aDuzTZ8zrsIcV2LEsy7pgDmAwNTU1qqmpiXusrq5OCxcuvNQyAABI2kD3YUtS\nSXG+LEtqbO50o6whDWuVeHl5uRoaGiRJDQ0NKisrS2lRAACkUygU/t/zh8S9Hkv+ojw1n8vCVeID\nWbBggbZv3y5J2r59Ox0yACCrDHZblySNKsxTq4GbpyQM7LVr1+p73/uejh8/rrlz5+r555/X/fff\nrz179mjx4sXau3ev7r///kzUCgBASgw2JC6Fdztr7+w1bvOUhHPYTzzxxICPb9myJeXFAACQCaHo\nKvELnysqCEdjZ3dQ/qK8TJY1JHY6AwDknMG2JpWkwvzwMZvthh0CQmADAHKOM8hOZ1L/udhdhp2L\nTWADAHLOYPdhS1JBX2B39hDYAAC4KjqHPcAkdqTD7uwisAEAcFW0wx7guTxfOBp7eu0MVpQYgQ0A\nyDmROeyBO+xwNHb1ENgAALgqeh/2AM/lecND4t102AAAuGuojVN8DIkDAGCGoe7DziewAQAwQ/8c\n9oXPRTrs7t5QBitKjMAGAOSc/lXiF3bYed5wNPYG6bABAHBVNLCH6LB76LABAHCXHT38Y4BFZ30d\ndg8dNgAA7nKGDOzwY7102AAAuGuovcTpsAEAMMRQt3X5oovO6LABAHBVKJkhcQIbAAB3hSLnYQ+Q\ngt6+DjtIYI8Mfz95QL/d9791svW026UAAC7SUEPiXk/47mzT5rB9bheQjc50nNX/2PM72Y6tk631\n+m+L1w/4lw4AMFMksAc4rEuWZcnrtRS06bCz3l+O7ZXthP/l9VnzCX3RctLligAAF2OoOWwpPCzO\nHPYI8M7JA/J5vFp65UJJ0oHTh1yuCABwMYYaEpckn4cOO+u1dp3T5811uqx0or5adrkk6ePAMXeL\nAgBclP4hcTrsEevwmU8lSVPGTNLoghIV543SZ00nXK4KAHAxhlolLoUXnhHYWe7jwHFJ0uTS8bIs\nS5X+cjW0B9TV2+VyZQCAZEXmsAc6rUsKb57CkHiW+/TsZ7IkTSipkiRVFJdLkk6eq3exKgDAxYgO\niQ/WYXvpsLOa4zg63nRCZaPGqsBXIEkqLxorSTrd1uBmaQCAi5Bw0RkddnZraD+jjt5OjfdXRh8r\nKxojSTp1rtGtsgAAFynhbV0ei53OstlnzXWSpOqS/sAeW1QqSWpoO+NKTQCAizfUximS5PV4FHIk\n26Aum8C+CJ819QW2vyL6WGlBiSSpsSPgSk0AgIsXHRIfJLG9fQeABCPLyQ1AYF+Ez/s67Cr/uOhj\nPq9P/vxiNbYT2ACQLZIZEpfMOgCEwL4InzXXqTh/lPz5xXGPjykcrUBHk0KOOX+xAIDBJVp0Fj2x\niyHx7NPW064zHWdVXVxxwXOjC/yynZBau865UBkA4GIlnsPu67AJ7OzzRXP4gI/Y4fCIkgK/JCnQ\n2ZzRmgAAw2M7Q89h+/o6bJPuxSawkzTQCvGI0X0LzwIdTRmtCQAwPJEO25toDpsOO/sc79svPHaF\neMTovg77LB02AGSFxHPY4cdtm1XiWed40wnle/NUPmrsBc+VFIQXoTV1tmS6LADAMNiJtibte6KX\nDju7dAd7VNd6SlXF4+QZ4GiXkvxwh93URWADQDZIeFuXlyHxrHS86YRCTkgTRlcP+LyfDhsAskrI\nTrBxCvdhZ6ejZ8NHak4cJLDzvXkq8OYT2ACQJeyEHTarxLPS4TOfSpImjx4/6Gv8+cVq6WrNVEkA\ngEvQv+hs4Od9fR22zdak2cNxHB1uPKqSAr9KC0cP+jp//ii1drfJDtkZrA4AMBz9G6cMNiROh511\n6lpPqbW7TZePmTTo8n8pPI/tyFFrd1sGqwMADEf/KvHsWXTmu5QfXrBggYqLi+XxeOT1evXiiy+m\nqi5jHDh9SJI0dexlQ76uOH+UJKm5qzV65CYAwEyhRIFt4MYplxTYkrRlyxaVlZWlohYjvfflQUnS\nFWVThnydPy+8Upx5bAAwX7JD4qwSzxLNXa36qOETTRxdHd0vfDCxHTYAwGx2osM/DBwSv+TAXr16\ntVasWKFnn302FfUY5f99/o4cOfpG1dcSvtZPYANA1ohunJJwSNycVeKXNCS+bds2VVVVKRAIaNWq\nVZo6dapmz5496Os3btyoTZs2XcpHZowdsvXKJ2/I5/ES2AAwwkT2CM+Z+7CrqqokSeXl5Vq0aJEO\nHjw45Otramp05MiRuF+1tbWXUkLavPX5O6pvP6OZ1V/XqLyihK8vzg/PYXMmNgCYL/kOewQEdkdH\nh9ra2qK/37Nnj6ZNm5aywtzU2dulbQd3yOfx6dtT/jWpnynOD4d6SzcdNgCYzu4L4kSndZkU2MMe\nEg8EAnrggQckSbZta+nSpZo7d27KCnPTsx/8p5q6WjT38uuG3Cwlls/jU6GvQM102ABgvP7DPwZ+\n3mfgxinDDuzJkyfr5ZdfTmUtRvis6YT+78d/UVnRGH37suS664jivFHc1gUAWSAbN07htq7z/MeB\nF+TI0XemL5DPe3H/ninOH6Vz3e1sTwoAhrNtRx5riCHxyH3YBLaZDjce1T/rj+iKsikJN0oZiD9/\nlBw5Osf2pABgNDsUGrS7lmI7bHNu6yKwY+z8OLxife6U64b186P6bu1q6WYeGwBMZocceTyDRyDn\nYRusuatVfz95UNX+Ck0unTCs9/Dn9QU2C88AwGi27WiIvJZvpN2HPZLs/eLvCjkhXTN+xpCncg2F\n7UkBIDvYIUfeIf5bH+2wQwS2cd6ue1+WpBkV04f9Hv78yAEgdNgAYLJQwjlsDv8w0rnuNh1uPKpJ\no8fLX1A87PfxR+ew6bABwGRB2xl0W1JJ8vUtOutllbhZDtYfkiNH08qnXtL7RIfEOwlsADBZeNHZ\nUEPidNhGOnj6sCTpirLLLul96LABIDvY9tBD4h6PJY/FojPjfNhwREW+AlWXVF7S++R581TgzafD\nBgDD2SEnurBsMF6vhyFxk5zpOKuG9oAuGzNJHuvSL0dx/ig102EDgNGCCTpsKbx5CkPiBjnc+Kkk\n6bLSiSl5P39+sVq72hQy6FYAAEC8UBIdts/jYUjcJEfORAJ7eJulnC+yPWkru50BgLESrRKXwh02\nQ+IG+SRwXD6PV+Mvcf46orjvXmw2TwEAc4X3Eh86An1ej4JBcw5zyunA7gn26PPmOlX7K+X1eFPy\nniV9gd3U1ZKS9wMApJYdcuQ4SrzojCFxcxxvPiHbCWni6OqUvWdk45UmVooDgJEiR2ZGTuQajM9r\nEdim+PTs55KkCSVVKXvPaIfd2Zyy9wQApI4dCexEi858HvUQ2GaIBHYqO+ySaIfNkDgAmCjSNXsT\nzWF7PAqFHNkhM87EzunAPnb2CxV481VWNCZl71mS75cknWUOGwCMFAlgT6IhcV/fEZu9Ziw8y9nA\n7uzt0pfn6jW+pHLYx2kOpCivUD6PlyFxADBUMNkhccMOAMnZwD7edEKOHI1P4fy1JFmWJX9+sc4S\n2ABgpEhg+xIGdjgie+iw3XWs6QtJStn917FGF/jV3NkqO2TGXzIAoF90Dtub+D7s2Ne7LecDO5Ur\nxCNGF5TIkcO92ABgoGAw2SFxOmwjHG9K/YKziNGFJZKkQEdTyt8bAHBp+u/DHjoC83zhQO/ppcN2\nTVdvl75srVd1ihecRZQWhAP7TMfZlL83AODSBO3wKvHEHXZ4B8xuOmz3HG8OLzhLx3C4JJX2ddhn\n2umwAcA0kSHuyG1bg8mL3NZlyH7iORnYn55N34IzSRpTOFqS1NgRSMv7AwCGrzfJVeKRwO7uIbBd\ncyyyw1lJ6nY4i1UaCex2hsQBwDS9fXPSyXbYzGG76NOzn6vQV6CxRaVpef9CX4EKfQVqaD+TlvcH\nAAxfZIjbl3DRWV+H3RtMe03JyLnAbutu16m2Bk0sqU7LgrOIsUWlamg7o5Bjxr/MAABhkQM9kg5s\nhsTdcfTsZ5KkCSk88GMgZUVj1BsKcggIABgmsugsL8GQeL6PVeKu+jhwTJI0uXR8Wj9nbN/93afO\nNaT1cwAAFyca2Ik67Lzw81102O44ciYc2Kk8UnMg40aNlSR9ea4+rZ8DALg43UkuOivIC3fYnd3M\nYWecHbL18ZljGjeqTKPyitL6WeNGlUmSvmw9ndbPAQBcnO6ecAAnHBLvC2w6bBccbzqhbrtHl42Z\nmPbPigT2idYv0/5ZAIDkRRaRReaoBxPtsLvosDPug4YjkqTLx0xK+2cV+PI1pnC0vmgmsAHAJJGO\nOT8vwZB4PkPirvln/SFJ0lfGTs7I51X5x6ml+xxnYwOAQTr7hsQjQ96DiTzf3tWb9pqSkTOB3dXb\npUONR1Xlr5A/vzgjnzm+b6/yY31boQIA3BfpmAsSBLbHslSQ71V7J4GdUQfqDykYsjWt/CsZ+8zI\n1qefBI5n7DMBAEPr7ArKYyVedCZJRQU+tRHYmfX2if2SpKvGXZGxz5xcOl6WLB1q/CRjnwkAGFpb\nZ68KC3xJ7XY5qsCnc+09GagqsZwI7M7eLr178oDGFpVGh6kzocBXoAklVfokcFwdvZ0Z+1wAwODa\nO3tUmO9L6rXFRXnq7rWN2O0sJwL7rc/fUbfdo5nVX0/r/uEDuaJ8imwnpPdPfZTRzwUAXMhxHLW2\n96i4MPnAlqSm1q50lpWUER/YdsjWriO18loe/cv4qzP++V+vmCZJeuvztzP+2QCAeG2dvQrajvyj\n8pN6/ei+150lsNPvtU/f0qm2Bl0zfoZKCvwZ//wqf4XGl1TqH19+wL7iAOCyM83h6cnRxckF9piS\nAklS/dmOtNWUrEsK7DfffFNLlizRokWLtHnz5lTVlDLHzn6urQdeUpGvUP/lK3Ncq+OGy2bLkaNn\n9j/HcZsA4KLTgXZJUtnowqReXzEmvI31ifpzaaspWcMObNu2tWHDBj399NPatWuXdu7cqaNHj6ay\ntmHrCnbrtaNv6b/+5Sn12D367tcWZ+ze64FcVTFNU8depv2nPtT/fPv3autud60WAMhll48v1dVX\nlGvG1PKkXj+hwi9L0ofHAuktLAnDDuyDBw9qypQpmjx5svLz83XTTTeptrY2lbUNy1+P79OqF/9N\nv3vvj7IdWyu+/h19rSJzt3INxLIs3TrjO5pQUqU3P39b/+vvf3C1HgDIVePHFesnd16bdIddVODT\nN746TnkJ9h3PhOSWyQ2gvr5e1dX9R1RWVVXp4MGDQ/7Mxo0btWnTpuF+ZFKK8gr11fLLNb18qhZM\n/ZbGFpWm9fMuxoaF/6Z9X/xDUzJw+AgAYGDlpYVJz2FL0iOr/lVFBcOOy5TJaAU1NTWqqamJe6yu\nrk4LFy5M2WdcN+laXTfp2pS9X6otmTbP7RIAIKfl+bxGdMwXa9hD4lVVVTp9uv+s5/r6elVVZW5T\nEgAAcsmwA/sb3/iGPvvsM504cUI9PT3atWuXFixYkMraAABAn2EPift8Pv3qV7/SfffdJ9u2deut\nt2ratGmprA0AAPS5pDnsefPmad485mQBAEi3Eb/TGQAAIwGBDQBAFiCwAQDIAgQ2AABZgMAGACAL\nENgAAGQBAhsAgCzg+m7mtm1LUtw2pwAAjFSRvIvkX7JcD+zGxkZJ0l133eVyJQAAZE5jY6OmTJmS\n9Ostx3GcNNaTUFdXlz744ANVVFTI6zXn9JSFCxcacb63m7gGYVyHMK4D1yCC63Bp18C2bTU2Nurq\nq69WYWFy53JLBnTYhYWFmjX+wLuOAAAFnUlEQVRrlttlDGjSpElul+A6rkEY1yGM68A1iOA6XNo1\nuJjOOoJFZwAAZAECGwCALEBgAwCQBby//vWvf+12Eaa67rrr3C7BdVyDMK5DGNeBaxDBdcj8NXB9\nlTgAAEiMIXEAALIAgQ0AQBYgsAEAyAIENgAAWYDABgAgC7i+NanbFixYoOLiYnk8Hnm9Xr344otx\nzzuOo3//93/XG2+8ocLCQj3++OOaMWOGS9WmR6Jr8Pbbb+tHP/pRdBu+RYsW6cc//rEbpaZVa2ur\nfvnLX+rjjz+WZVn6zW9+o2uvvTb6fC58FxJdg1z4Lhw7dkwPPvhg9M8nTpzQmjVrdM8990QfG+nf\nhWSuQS58FyTpmWee0fPPPy/LsjR9+nQ99thjKigoiD7f09OjdevW6cMPP9SYMWP05JNPpm/bVifH\nzZ8/3wkEAoM+/9e//tVZvXq1EwqFnP379zu33XZbBqvLjETX4G9/+5tz//33Z7Aid6xbt8557rnn\nHMdxnO7ubqelpSXu+Vz4LiS6BrnyXYgIBoPOt771Laeuri7u8Vz4LkQMdg1y4btw+vRpZ/78+U5n\nZ6fjOI6zZs0a54UXXoh7zR/+8Afn0UcfdRzHcXbu3On85Cc/SVs9DIknUFtbq+XLl8uyLF1zzTVq\nbW1VQ0OD22Uhxc6dO6d3331Xt912myQpPz9fo0ePjnvNSP8uJHMNcs2+ffs0efJkTZw4Me7xkf5d\niDXYNcgVtm2rq6tLwWBQXV1dqqysjHv+9ddf1y233CJJWrJkifbt2ycnTdubENiSVq9erRUrVujZ\nZ5+94Ln6+npVV1dH/1xdXa36+vpMlpcRQ10DSXr//ff13e9+V/fdd58++eSTDFeXfnV1dSorK9P6\n9eu1fPlyPfLII+ro6Ih7zUj/LiRzDaSR/12ItWvXLi1duvSCx0f6dyHWYNdAGvnfhaqqKt17772a\nP3++brjhBvn9ft1www1xr6mvr9f48eMlST6fTyUlJWpqakpLPTkf2Nu2bdNLL72k3/3ud9q6dave\nffddt0vKuETXYMaMGXr99df18ssv6wc/+IEeeOABlypNn2AwqI8++kgrV67U9u3bVVRUpM2bN7td\nVkYlcw1y4bsQ0dPTo9dff1033nij26W4ZqhrkAvfhZaWFtXW1qq2tlZvvfWWOjs7tWPHDtfqyfnA\nrqqqkiSVl5dr0aJFOnjw4AXPnz59Ovrn06dPR39mpEh0Dfx+v4qLiyVJ8+bNUzAY1NmzZzNeZzpV\nV1erurpaM2fOlCTdeOON+uijj+JeM9K/C8lcg1z4LkS8+eabmjFjhsaNG3fBcyP9uxAx1DXIhe/C\n3r17NWnSJJWVlSkvL0+LFy/W/v37415TVVWlU6dOSQr/o/fcuXMaO3ZsWurJ6cDu6OhQW1tb9Pd7\n9uzRtGnT4l6zYMECbd++XY7j6P3331dJSckFcxjZLJlr0NjYGJ2TOXjwoEKhUNq+kG6pqKhQdXW1\njh07Jik8b3fFFVfEvWakfxeSuQa58F2I2LVrl2666aYBnxvp34WIoa5BLnwXJkyYoAMHDqizs1OO\n4wz634WXXnpJkvTKK6/om9/8pizLSks9OX1bVyAQiA7j2LatpUuXau7cudq2bZskaeXKlZo3b57e\neOMNLVq0SEVFRfrNb37jZskpl8w1eOWVV7Rt2zZ5vV4VFhbqiSeeSNsX0k2PPvqofvazn6m3t1eT\nJ0/WY489llPfBSnxNciV70JHR4f27t2rDRs2RB/Lte9ComuQC9+FmTNnasmSJbrlllvk8/l01VVX\n6c4779Rvf/tbXX311Vq4cKFuu+02/fznP9eiRYtUWlqqJ598Mm31cFoXAABZIKeHxAEAyBYENgAA\nWYDABgAgCxDYAABkAQIbAIAsQGADAJAFCGwAALIAgQ0AQBb4/1MKuAisMjLJAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f35586c4a50>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for i in evolved_seqs:\n",
    "    sns.kdeplot(evolved_seqs[i]['prediction'], shade=True, legend=True, label=i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>utr</th>\n",
       "      <th>prediction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AAGTTGACGTAGAATTATACTTTCGGATCTTCATTGCACTAGACGTTGAT</td>\n",
       "      <td>7.94126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GAGAGAAAAAAGAGTTCAGAGTTAGTAGTGTACAGAGAGATCTGTTGGAT</td>\n",
       "      <td>7.93827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>TTGGATCTCGTTTTATTTACTTGGAGTACAGAGTCGTATTACTAGATCCC</td>\n",
       "      <td>7.9233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GTGGAGTTTATCCAATCTACTTGGATTCCGAGTTCTTTGCAGGAAGAGCT</td>\n",
       "      <td>7.91531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>ATCAATTGAGAGAGTCTTACAAATTCGATTTGGACGTCGACGCGAGGGTC</td>\n",
       "      <td>7.91183</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  utr prediction\n",
       "4  AAGTTGACGTAGAATTATACTTTCGGATCTTCATTGCACTAGACGTTGAT    7.94126\n",
       "0  GAGAGAAAAAAGAGTTCAGAGTTAGTAGTGTACAGAGAGATCTGTTGGAT    7.93827\n",
       "9  TTGGATCTCGTTTTATTTACTTGGAGTACAGAGTCGTATTACTAGATCCC     7.9233\n",
       "1  GTGGAGTTTATCCAATCTACTTGGATTCCGAGTTCTTTGCAGGAAGAGCT    7.91531\n",
       "6  ATCAATTGAGAGAGTCTTACAAATTCGATTTGGACGTCGACGCGAGGGTC    7.91183"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evolved_seqs[8].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
